# Analysis_CandidateLevel.R
# Correlate the number of voters for whom the candidate is the closest with the number of votes they receive.
# Nico Ravanilla, Michael Davidson & Allen Hicken
# September 2021
# Version 1.0
# Replication code


rm(list=ls())

library(haven)
library(ggplot2)
library(gtools)

setwd("~/Google Drive/Project Networks_Sorg/Analyses/5_Analyses_033116/")

# Candidate Level Data
all <- as.data.table(read_dta(path="build/output/CandidateLevel.dta"))
  # Drop Useless Columns
  subset(all, select=!grepl(names(all), pattern="\\.y$"))
  names(all) <- gsub(x=names(all), pattern="\\.y$", replacement="")

# Plots
  # Log or Not?
  pdf("analyses/output/figures/logOrNot.pdf")
    par(mfrow=c(2,2))
    hist(all$voteshare)
    hist(all$log_voteshare)
    hist(all$pred_voteshare)
    hist(all$log_pred_voteshare)
  dev.off()

  # Scatter Plots
  with(all, scatter.smooth(y=votes, pred_votes, ylab="Actual Votes Received", xlab="Predicted Votes Received"))
    abline(b=1)
  with(all, scatter.smooth(y=voteshare, pred_voteshare))
  with(all, scatter.smooth(y=log_voteshare, log_pred_voteshare))
  
  # Bgy Candidates Only
  (bcO <- ggplot(data=all[city_cand!=1], aes(x=pred_votes, y=votes)) + geom_smooth(method='lm') + geom_abline(slope=1, colour="red") + geom_point(aes(alpha=.5)) + ylab("Votes Received") + xlab("Predicted Votes\n(Closest Candidate Method)") + scale_alpha(guide = 'none') + coord_equal() + ggtitle("Barangay Candidates"))
  ggsave(filename="analyses/output/figures/votesPredVotes_bgyCands.pdf")
  
  # Municipal Candidates Only 
  (mcO <- ggplot(data=all[city_cand==1], aes(x=pred_votes, y=votes)) + geom_smooth(method='lm') + geom_abline(slope=1, colour="red") + geom_point(aes(alpha=.5)) + ylab("Votes Received") + xlab("Predicted Votes\n(Closest Candidate Method)") + scale_alpha(guide = 'none') + coord_equal() + ggtitle("Municipal Candidates"))
  ggsave(filename="analyses/output/figures/votesPredVotes_muniCands.pdf")

ggsave(plot=grid.arrange(bcO, mcO, ncol=2), filename="analyses/output/figures/votesPredVotes.pdf", height=3, width=9)

# Regressions
  # Raw Votes  
  summary(m1 <- lm(data=all, formula=votes ~ pred_votes))
  summary(m2 <- lm(data=all, formula=votes ~ pred_votes + factor(bgy_code)))
  summary(m3 <- lm(data=all, formula=votes ~ pred_votes + factor(bgy_code) + log_bCent + log_degree))
  summary(m4 <- lm(data=all, formula=votes ~ pred_votes + factor(bgy_code) + log_bCent + log_degree + male + age))
  summary(m5 <- lm(data=all, formula=votes ~ pred_votes + factor(bgy_code) + log_bCent + log_degree + male + age + incumbent))
  summary(m6 <- lm(data=all, formula=votes ~ pred_votes + factor(bgy_code) + log_bCent + log_degree + male + age + incumbent + incumbent_lastname))

  # Vote Shares
  summary(m1 <- lm(data=all, formula=voteshare ~ pred_voteshare))
  summary(m2 <- lm(data=all, formula=voteshare ~ pred_voteshare + factor(bgy_code)))
  summary(m3 <- lm(data=all, formula=voteshare ~ pred_voteshare + factor(bgy_code) + log_bCent + log_degree))
  summary(m4 <- lm(data=all, formula=voteshare ~ pred_voteshare + factor(bgy_code) + log_bCent + log_degree + male + age))
  summary(m5 <- lm(data=all, formula=voteshare ~ pred_voteshare + factor(bgy_code) + log_bCent + log_degree + male + age + incumbent))
  summary(m6 <- lm(data=all, formula=voteshare ~ pred_voteshare + factor(bgy_code) + log_bCent + log_degree + male + age + incumbent + incumbent_lastname))
  
  # Log Vote Shares
  summary(m1 <- lm(data=all, formula=log_voteshare ~ log_pred_voteshare))
  summary(m2 <- lm(data=all, formula=log_voteshare ~ log_pred_voteshare + factor(bgy_code)))
  summary(m3 <- lm(data=all, formula=log_voteshare ~ log_pred_voteshare + factor(bgy_code) + log_bCent + log_degree))
  summary(m4 <- lm(data=all, formula=log_voteshare ~ log_pred_voteshare + factor(bgy_code) + log_bCent + log_degree + male + log_age))
  summary(m5 <- lm(data=all, formula=log_voteshare ~ log_pred_voteshare + factor(bgy_code) + log_bCent + log_degree + male + log_age + incumbent))
  summary(m6 <- lm(data=all, formula=log_voteshare ~ log_pred_voteshare + factor(bgy_code) + log_bCent + log_degree + male + log_age + incumbent + incumbent_lastname))
  summary(m7 <- lm(data=all, formula=log_voteshare ~ log_pred_voteshare + factor(bgy_code) + log_bCent + log_degree + male + log_age + incumbent + incumbent_lastname + city_cand))
  
    # Cluster Corrected SEs
  cl <- function(dat,fm, cluster){
    require(sandwich, quietly = TRUE)
    require(lmtest, quietly = TRUE)
    M <- length(unique(cluster))
    N <- length(cluster)
    K <- fm$rank
    dfc <- (M/(M-1))*((N-1)/(N-K))
    uj <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
    vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
    coeftest(fm, vcovCL) }
  
  m1se <- cl(dat=all, fm=m1, all$bgy_code[as.numeric(rownames(all)) %in% as.numeric(rownames(estfun(m1)))])[,2]
  m2se <- cl(dat=all, fm=m2, cluster=m2$model$`factor(bgy_code)`)[,2]
  m3se <- cl(dat=all, fm=m3, cluster=m3$model$`factor(bgy_code)`)[,2]
  m4se <- cl(dat=all, fm=m4, cluster=m4$model$`factor(bgy_code)`)[,2]
  m5se <- cl(dat=all, fm=m5, cluster=m5$model$`factor(bgy_code)`)[,2]
  m6se <- cl(dat=all, fm=m6, cluster=m6$model$`factor(bgy_code)`)[,2]
  m7se <- cl(dat=all, fm=m7, cluster=m7$model$`factor(bgy_code)`)[,2]
  ses <- list(m1se, m2se, m3se, m4se, m5se, m6se, m7se)
  
  
  # Export Tables
  library(stargazer)
  stargazer(m1,m2,m3,m4,m5,m6,m7, title="The elasticity of actual candidate vote-shares with respect to share of voters closest to candidate.", omit="^factor\\(bgy", dep.var.labels="Log Vote Share", covariate.labels=c("Log Share of Voters Closest to Candidate", "Log Betweenness Centrality", "Log Degree Centrality", "Male = 1", "Log Age", "Incumbent = 1", "Relative of Incumbent = 1","City-level Candidate = 1"), add.lines=list(c("Barangay Fixed Effects","NO","YES","YES","YES","YES","YES","YES"),c("No. of Barangay \\textit{(j)}","63","63","63","63","63","63","63"), c("No. of Candidates \\textit{(k)}","1,811","1,811","1,811","1,811","1,811","1,811","1,811")), omit.stat = c("N","ser","F"), se=ses, intercept.bottom=TRUE, notes="\\parbox[t]{16cm}{Actual vote-shares are based on from the May 2013 Municipal Elections for city-level candidates (41 of them), and from the October 2013 Barangay Elections for barangay-level candidates (1,811 of them). Predicted vote-share of a candidate is the total number of individuals voting for the candidate in a hypothetical election in which each voter chooses the candidate to whom they are most closely related, divided by the total number of voters. Huber/White robust standard errors clustered at the barangay (village) level in parentheses. Significance at the 10\\% level is represented by *, at the 5\\% level by **, and at the 1\\% level by ***.}", notes.append=FALSE, notes.align = "l", out="analyses/output/tables/bgyCandsClosest.tex", label="tab:ClosestCand", font.size = "scriptsize") #  float.env="sidewaystable"
  # Manually: remove the extra notes column, split 1st covariate label across 2 rows
  
